#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @File  : main.py
# @Author: Richard Chiming Xu
# @Date  : 2022/3/17
# @Desc  : 服务入口

import tornado.ioloop
import tornado.web
import json
import joblib
import re
import jieba

# 读取数据
def get_stopwords():
    stop_words = []
    with open('../baidu_stopwords.txt', 'r', encoding='utf-8') as f:
        for line in f.readlines():
            stop_words.append(line.replace('\n', ''))
    return stop_words


# jieba分词
def cut(content, stop_words):
    # 去除符号
    content = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——！，。？、~@#￥%……&*（）]", "", content)

    result = []
    try:
        seg_list = jieba.lcut(content, cut_all=True)
        for i in seg_list:
            if i not in stop_words:
                result.append(i)

    except AttributeError as ex:
        print(content)
        raise ex
    return ' '.join(result)



class MainHandler(tornado.web.RequestHandler):

    def post(self):
        # 获取文本
        request_body = json.loads(self.request.body.decode())
        text = request_body['text']
        # 加载模型
        vectorizer = joblib.load('../model/tfidf_model.pkl')
        model = joblib.load('../model/classify_model.pkl')
        # 获取停用词
        stop_words = get_stopwords()
        # 分词
        words = cut(text, stop_words)
        # 预测
        X = vectorizer.transform([words])
        self.write(str(model.predict(X)))


def make_app():
    return tornado.web.Application([
        (r"/", MainHandler),
    ])

if __name__ == "__main__":
    app = make_app()
    app.listen(5000)
    tornado.ioloop.IOLoop.current().start()